This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
What does Ljung box-statistic do; should they be significant or not? Should residuals look normal? Are the * in eacf mean its significant? What should ACF look like for stationarity?
library(TSA)
Loading required package: leaps
Loading required package: locfit
locfit 1.5-9.1 2013-03-22
Loading required package: mgcv
Loading required package: nlme
This is mgcv 1.8-22. For overview type 'help("mgcv-package")'.
Loading required package: tseries
‘tseries’ version: 0.10-42
‘tseries’ is a package for time series analysis and computational
finance.
See ‘library(help="tseries")’ for details.
Attaching package: ‘TSA’
The following objects are masked from ‘package:stats’:
acf, arima
The following object is masked from ‘package:utils’:
tar
library(tseries)
library(astsa)
library(imputeTS)
Attaching package: ‘imputeTS’
The following object is masked from ‘package:tseries’:
na.remove
library(tsoutliers)
library(xts)
Loading required package: zoo
Attaching package: ‘zoo’
The following object is masked from ‘package:imputeTS’:
na.locf
The following objects are masked from ‘package:base’:
as.Date, as.Date.numeric
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
terror2 <- read.csv("input/og_num_casualities_greater_than_10.csv")
terror3 <- na.interpolation(terror2$num.attacks.with.kill.thresh, option="linear")
plot(as.xts(ts(terror3, frequency = 12, start=1970)), main = "Number of Terrorist Attacks (w/ Linear Imputed Data)", major.format = "%Y-%m", grid.col="white", lwd=1, major.ticks = "years", ylim=c(0, 225), col="red")
pdf("image/og_ts.pdf")
lines(as.xts(ts(terror2$num.attacks.with.kill.thresh, frequency = 12, start=1970)), col="black")
dev.off()
quartz_off_screen
2
#pdf("image/imputed_ts.pdf")
#plot(as.xts(ts(terror3, frequency = 12, start=1970)), main = "Number of Terrorist Attacks (w/ Linear Imputed Data)", major.format = "%Y-%m", grid.col="white", lwd=1, major.ticks = "years", ylim=c(0, 225))
#dev.off()
outlier_terror3 <- tso(ts(terror3), types = c("TC", "AO", "IO"))
stopped when 'maxit.oloop = 4' was reached
plot(outlier_terror3)
#plot outlier effects
pdf("image/outlier_effects.pdf")
plot(as.xts(ts(outlier_terror3$effects, frequency = 12, start=1970)), main = "Outlier Effects", major.format = "%Y-%m", grid.col="white", lwd=1, major.ticks = "years", col="red")
dev.off()
quartz_off_screen
2
#Plot outlier time series
xts.terror3 <- as.xts(ts(terror3, frequency = 12, start=1970))
plot(as.xts(ts(terror3, frequency = 12, start=1970)), main = "Number of Terrorist Attacks (Outliers Removed)", major.format = "%Y-%m", grid.col="white", lwd=1, major.ticks = "years", col="lightgray", ylim=c(0, 225))
lines(as.xts(ts(outlier_terror3$yadj, frequency = 12, start=1970)), col="blue")
points(xts.terror3[427], col="red",pch=19, cex=1)
points(xts.terror3[516], col="red",pch=19, cex=1)
points(xts.terror3[521], col="red",pch=19, cex=1)
points(xts.terror3[523], col="red",pch=19, cex=1)
points(xts.terror3[547], col="red",pch=19, cex=1)
pdf("image/outlier_comparison.pdf")
points(xts.terror3[556], col="red",pch=19, cex=1)
dev.off()
quartz_off_screen
2
terror4 <- outlier_terror3$yadj
#terror3 <- na.kalman(terror2$num.attacks, model="auto.arima")
cuttoff.index <- length(terror4) - 48 #floor(0.1 * length(terror3))
cuttoff.index2 <- length(terror4) - 12
terror4.valid <- terror4[(cuttoff.index+1) :cuttoff.index2]
terror4.testing <- terror4[(cuttoff.index2 + 1): length(terror4)]
terror4 <- terror4[1: cuttoff.index]
#plot(as.xts(ts(terror4, frequency = 12, start=1970)), main = "Number of Terrorist Attacks (Training Set)", major.format = "%Y-%m", grid.col="white", lwd=1, major.ticks = "years")
#plot(as.xts(ts(terror4.valid, frequency = 12, start=1970)), main = "Number of Terrorist Attacks (Validation Set)", major.format = "%Y-%m", grid.col="white", lwd=1)
#log_terror4 <- log(outlier_terror3$yadj)
adf.test(terror4, k=1)
p-value smaller than printed p-value
Augmented Dickey-Fuller Test
data: terror4
Dickey-Fuller = -5.8969, Lag order = 1, p-value = 0.01
alternative hypothesis: stationary
adf.test(diff(terror4), k=1)
p-value smaller than printed p-value
Augmented Dickey-Fuller Test
data: diff(terror4)
Dickey-Fuller = -24.495, Lag order = 1, p-value = 0.01
alternative hypothesis: stationary
adf.test(diff(diff(terror4)), k=1)
p-value smaller than printed p-value
Augmented Dickey-Fuller Test
data: diff(diff(terror4))
Dickey-Fuller = -32.125, Lag order = 1, p-value = 0.01
alternative hypothesis: stationary
pdf("image/first_diff.pdf")
plot(as.xts(ts(diff(terror4), frequency = 12, start=1970)), main = "Number of Terrorist Attacks (First Diff)", major.format = "%Y-%m", grid.col="white", lwd=1, major.ticks = "years")
dev.off()
null device
1
pdf("image/second_diff.pdf")
plot(as.xts(ts(diff(diff(terror4)), frequency = 12, start=1970)), main = "Number of Terrorist Attacks (Second Diff)", major.format = "%Y-%m", grid.col="white", lwd=1, major.ticks = "years")
dev.off()
null device
1
#ts.plot(diff(terror4))
#ts.plot(diff(diff(terror4)))
m = floor(sqrt(length(diff(terror4))))
pdf("image/raw_periodogram.pdf")
mvspec(diff(terror4), log="no", main="Raw Periodogram")
dev.off()
null device
1
pdf("image/smooth_tapered_periodogram.pdf")
mvspec(diff(terror4), kernel('daniell', m), log="no", taper=0.1, main="Smoothed and Tapered Periodogram")
dev.off()
null device
1
#mvspec(diff(log_terror4), kernel('daniell', m), log="no")
acf(terror4, main="ACF of Training Data")
pacf(terror4, main="PACF of Training Data")
acf(diff(terror4), main="ACF of First Diff Training Data")
pacf(diff(terror4), main="PACF of Second Diff Training Data")
eacf(diff(terror4))
AR/MA
0 1 2 3 4 5 6 7 8 9 10 11 12 13
0 x o x x o o o o o o o o o x
1 x x o x o o o o o o o o o o
2 x x x x o o o o o o o o o o
3 x x x x o o o o o o o o o o
4 x o o o o o o o o o o o o o
5 x x o o o o o o o o o o o o
6 x x o o o o o o o o o o o o
7 x x o o o o o o o o o o o o
eacf(diff(diff(terror4)))
AR/MA
0 1 2 3 4 5 6 7 8 9 10 11 12 13
0 x x x x o o o o o o o o o x
1 x x x x o o o o o o o o o o
2 x x x x o o o o o o o o o o
3 x o o o o o o o o o o o o o
4 x x o o o o o o o o o o o o
5 x x o o o o x o o o o o o o
6 x x x o o o o o o o o o o o
7 x x o o o o o o o o o o o o
sarima(terror4, 0, 1, 1)
initial value 2.302665
iter 2 value 2.148680
iter 3 value 2.132396
iter 4 value 2.128997
iter 5 value 2.121714
iter 6 value 2.120506
iter 7 value 2.120430
iter 8 value 2.120364
iter 9 value 2.120362
iter 10 value 2.120362
iter 10 value 2.120362
iter 10 value 2.120362
final value 2.120362
converged
initial value 2.120944
iter 2 value 2.120943
iter 3 value 2.120942
iter 3 value 2.120942
iter 3 value 2.120942
final value 2.120942
converged
$fit
Call:
stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D,
Q), period = S), xreg = constant, optim.control = list(trace = trc, REPORT = 1,
reltol = tol))
Coefficients:
ma1 constant
-0.6746 0.1596
s.e. 0.0336 0.1200
sigma^2 estimated as 69.46: log likelihood = -1823.04, aic = 3652.08
$degrees_of_freedom
[1] 513
$ttable
Estimate SE t.value p.value
ma1 -0.6746 0.0336 -20.0780 0.0000
constant 0.1596 0.1200 1.3306 0.1839
$AIC
[1] 5.248457
$AICc
[1] 5.252424
$BIC
[1] 4.264915
sarima(terror4, 0, 1, 1, 1, 0, 1, 4)
initial value 2.306485
iter 2 value 2.160550
iter 3 value 2.137688
iter 4 value 2.128502
iter 5 value 2.123569
iter 6 value 2.120401
iter 7 value 2.119951
iter 8 value 2.119925
iter 9 value 2.119914
iter 10 value 2.119906
iter 11 value 2.119902
iter 12 value 2.119881
iter 13 value 2.119531
iter 14 value 2.119158
iter 15 value 2.118895
iter 16 value 2.118789
iter 17 value 2.118773
iter 18 value 2.118748
iter 19 value 2.118211
iter 20 value 2.116368
iter 21 value 2.115097
iter 22 value 2.114878
iter 23 value 2.114721
iter 24 value 2.114670
iter 25 value 2.114631
iter 26 value 2.114584
iter 27 value 2.114555
iter 28 value 2.114550
iter 29 value 2.114550
iter 29 value 2.114550
final value 2.114550
converged
initial value 2.111537
iter 2 value 2.111531
iter 3 value 2.111529
iter 4 value 2.111529
iter 5 value 2.111529
iter 6 value 2.111529
iter 7 value 2.111528
iter 8 value 2.111528
iter 9 value 2.111528
iter 9 value 2.111528
iter 9 value 2.111528
final value 2.111528
converged
$fit
Call:
stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D,
Q), period = S), xreg = constant, optim.control = list(trace = trc, REPORT = 1,
reltol = tol))
Coefficients:
ma1 sar1 sma1 constant
-0.6863 -0.7178 0.8165 0.1595
s.e. 0.0346 0.0962 0.0774 0.1212
sigma^2 estimated as 68.13: log likelihood = -1818.19, aic = 3646.38
$degrees_of_freedom
[1] 511
$ttable
Estimate SE t.value p.value
ma1 -0.6863 0.0346 -19.8500 0.0000
sar1 -0.7178 0.0962 -7.4641 0.0000
sma1 0.8165 0.0774 10.5431 0.0000
constant 0.1595 0.1212 1.3163 0.1887
$AIC
[1] 5.236941
$AICc
[1] 5.241045
$BIC
[1] 4.269857
sarima(terror4, 0, 1, 1, 1, 1, 1, 4)
initial value 2.585951
iter 2 value 2.295422
iter 3 value 2.231925
iter 4 value 2.195223
iter 5 value 2.174548
iter 6 value 2.150154
iter 7 value 2.141586
iter 8 value 2.141306
iter 9 value 2.141144
iter 10 value 2.141058
iter 11 value 2.141057
iter 11 value 2.141057
iter 11 value 2.141057
final value 2.141057
converged
initial value 2.141567
iter 2 value 2.139146
iter 3 value 2.138306
iter 4 value 2.138235
iter 5 value 2.138226
iter 5 value 2.138226
iter 5 value 2.138226
final value 2.138226
converged
$fit
Call:
stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D,
Q), period = S), include.mean = !no.constant, optim.control = list(trace = trc,
REPORT = 1, reltol = tol))
Coefficients:
ma1 sar1 sma1
-0.6846 0.0988 -1.0000
s.e. 0.0362 0.0450 0.0171
sigma^2 estimated as 69.26: log likelihood = -1817.71, aic = 3643.42
$degrees_of_freedom
[1] 508
$ttable
Estimate SE t.value p.value
ma1 -0.6846 0.0362 -18.8959 0.0000
sar1 0.0988 0.0450 2.1935 0.0287
sma1 -1.0000 0.0171 -58.6006 0.0000
$AIC
[1] 5.249478
$AICc
[1] 5.253506
$BIC
[1] 4.274165
sarima(terror4, 0, 1, 1, 1, 1, 2, 4)
initial value 2.585951
iter 2 value 2.271150
iter 3 value 2.221817
iter 4 value 2.190039
iter 5 value 2.158348
iter 6 value 2.137558
iter 7 value 2.136854
iter 8 value 2.134312
iter 9 value 2.132861
iter 10 value 2.132729
iter 11 value 2.132550
iter 12 value 2.130744
iter 13 value 2.127256
iter 14 value 2.126990
iter 15 value 2.126819
iter 16 value 2.126642
iter 17 value 2.125945
iter 18 value 2.125863
iter 19 value 2.125557
iter 20 value 2.125495
iter 21 value 2.125487
iter 22 value 2.125487
iter 22 value 2.125487
final value 2.125487
converged
initial value 2.133766
iter 2 value 2.133364
iter 3 value 2.133341
iter 4 value 2.133331
iter 5 value 2.133324
iter 6 value 2.133285
iter 7 value 2.133273
iter 8 value 2.133272
iter 8 value 2.133272
iter 8 value 2.133272
final value 2.133272
converged
$fit
Call:
stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D,
Q), period = S), include.mean = !no.constant, optim.control = list(trace = trc,
REPORT = 1, reltol = tol))
Coefficients:
ma1 sar1 sma1 sma2
-0.6840 -0.7141 -0.1856 -0.8144
s.e. 0.0348 0.0974 0.0807 0.0800
sigma^2 estimated as 68.5: log likelihood = -1815.18, aic = 3640.36
$degrees_of_freedom
[1] 507
$ttable
Estimate SE t.value p.value
ma1 -0.6840 0.0348 -19.6474 0.0000
sar1 -0.7141 0.0974 -7.3348 0.0000
sma1 -0.1856 0.0807 -2.2985 0.0219
sma2 -0.8144 0.0800 -10.1848 0.0000
$AIC
[1] 5.242331
$AICc
[1] 5.246435
$BIC
[1] 4.275247
sarima(terror4, 0, 1, 1, 2, 1, 2, 4)
initial value 2.589883
iter 2 value 2.260830
iter 3 value 2.211578
iter 4 value 2.166702
iter 5 value 2.161786
iter 6 value 2.145094
iter 7 value 2.143648
iter 8 value 2.132206
iter 9 value 2.131824
iter 10 value 2.131740
iter 11 value 2.131720
iter 12 value 2.131710
iter 13 value 2.131709
iter 14 value 2.131701
iter 15 value 2.131677
iter 16 value 2.131676
iter 17 value 2.131668
iter 18 value 2.131654
iter 19 value 2.131568
iter 20 value 2.131267
iter 21 value 2.130868
iter 22 value 2.130308
iter 23 value 2.129494
iter 24 value 2.126771
iter 25 value 2.123108
iter 26 value 2.118333
iter 27 value 2.117334
iter 28 value 2.115281
iter 29 value 2.114663
iter 30 value 2.114534
iter 31 value 2.114314
iter 32 value 2.114259
iter 33 value 2.114252
iter 33 value 2.114252
iter 33 value 2.114252
final value 2.114252
converged
initial value 2.140151
iter 2 value 2.138317
iter 3 value 2.137952
iter 4 value 2.137943
iter 5 value 2.137940
iter 6 value 2.137940
iter 7 value 2.137937
iter 8 value 2.137927
iter 9 value 2.137902
iter 10 value 2.137786
iter 11 value 2.137720
iter 12 value 2.137647
iter 13 value 2.137630
iter 14 value 2.137629
iter 15 value 2.137622
iter 16 value 2.137566
iter 17 value 2.137345
iter 18 value 2.137312
iter 19 value 2.137252
iter 20 value 2.137175
iter 21 value 2.137137
iter 22 value 2.137131
iter 23 value 2.136787
iter 24 value 2.136741
iter 25 value 2.136710
iter 26 value 2.136680
iter 27 value 2.136665
iter 28 value 2.136586
iter 29 value 2.136295
iter 30 value 2.135978
iter 31 value 2.135829
iter 32 value 2.135553
iter 33 value 2.135345
iter 34 value 2.134425
iter 35 value 2.133956
iter 36 value 2.133785
iter 37 value 2.133651
iter 38 value 2.133618
iter 39 value 2.133336
iter 40 value 2.133248
iter 41 value 2.133122
iter 42 value 2.133078
iter 43 value 2.133071
iter 44 value 2.133070
iter 45 value 2.133070
iter 45 value 2.133069
final value 2.133069
converged
$fit
Call:
stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D,
Q), period = S), include.mean = !no.constant, optim.control = list(trace = trc,
REPORT = 1, reltol = tol))
Coefficients:
ma1 sar1 sar2 sma1 sma2
-0.6859 -0.7149 0.0223 -0.1711 -0.8288
s.e. 0.0355 0.0932 0.0494 0.0829 0.0822
sigma^2 estimated as 68.5: log likelihood = -1815.08, aic = 3642.15
$degrees_of_freedom
[1] 506
$ttable
Estimate SE t.value p.value
ma1 -0.6859 0.0355 -19.3358 0.0000
sar1 -0.7149 0.0932 -7.6751 0.0000
sar2 0.0223 0.0494 0.4513 0.6520
sma1 -0.1711 0.0829 -2.0656 0.0394
sma2 -0.8288 0.0822 -10.0796 0.0000
$AIC
[1] 5.246148
$AICc
[1] 5.250344
$BIC
[1] 4.287293
#eacf(diff(diff(log_terror4)))
terror5 <- terror4
total_error <- 0
for (i in 1: (length(terror4.valid) - 11))
{
actual <- terror4.valid[i : i + 11]
predicted <- sarima.for(terror5, 12, 0, 1, 1, 1, 0, 0, 4)$pred
total_error <- total_error + sum((actual - predicted)^2)
terror5 <- c(terror5, terror4.valid[i])
}
mse <- total_error / (length(terror4.valid) - 11)
val <- sarima.for(c(terror4, terror4.valid), 12, 0, 1, 1, 1, 0, 0, 4)
pred <-val$pred
err <-val$se
total <- c(terror4, terror4.valid, terror4.testing)
plot(as.xts(ts(total, frequency = 12, start=1970))[492:length(total)], main = "Number of Terrorist Attacks (Training Set)", major.format = "%Y-%m", grid.col="white", lwd=1, major.ticks = "years", col="lightgray", pch="1", ylim=c(0, 225))
points(as.xts(ts(total, frequency = 12, start=1970)),col="lightgray",pch="o")
lines(as.xts(ts(c(terror4, terror4.valid), frequency = 12, start=1970)),col="black")
points(as.xts(ts(c(terror4, terror4.valid), frequency = 12, start=1970)),col="black",pch="o")
lines(as.xts(ts(pred, frequency = 12, start=2016)),col="blue")
lines(as.xts(ts(pred + err, frequency = 12, start=2016)),col="blue", lty="dashed")
lines(as.xts(ts(pred - err, frequency = 12, start=2016)),col="blue", lty="dashed")
lines(as.xts(ts(pred + 2*err, frequency = 12, start=2016)),col="blue", lty="dotted")
lines(as.xts(ts(pred - 2*err, frequency = 12, start=2016)),col="blue", lty="dotted")